library(vcfR)
## 
##    *****       ***   vcfR   ***       *****
##    This is vcfR 1.12.0 
##      browseVignettes('vcfR') # Documentation
##      citation('vcfR') # Citation
##    *****       *****      *****       *****
library(SNPfiltR)
## This is SNPfiltR v.1.0.0
## 
## Detailed usage information is available at: devonderaad.github.io/SNPfiltR/ 
## 
## If you use SNPfiltR in your published work, please cite the following papers: 
## 
## DeRaad, D.A. (2022), SNPfiltR: an R package for interactive and reproducible SNP filtering. Molecular Ecology Resources, 00, 1-15. http://doi.org/10.1111/1755-0998.13618 
## 
## Knaus, Brian J., and Niklaus J. Grunwald. 2017. VCFR: a package to manipulate and visualize variant call format data in R. Molecular Ecology Resources, 17.1:44-53. http://doi.org/10.1111/1755-0998.12549
library(ggplot2)

Filter input SNP files based on MAC to increase signal to noise ratio

#bring in sample info
#read in sample info csv
sample.info<-read.csv("~/Desktop/mex.towhees/sample.locs.csv")

#filter input vcf by MAC in order to increase signal to noise ratio for ADMIXTURE analysis
#read in filtered vcf
vcfR <- read.vcfR("~/Desktop/mex.towhees/towhee.75.mac2.unlinked.nomito.vcf")
## Scanning file to determine attributes.
## File attributes:
##   meta lines: 4800
##   header_line: 4801
##   variant count: 2668
##   column count: 41
## 
Meta line 1000 read in.
Meta line 2000 read in.
Meta line 3000 read in.
Meta line 4000 read in.
Meta line 4800 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
##   Character matrix gt rows: 2668
##   Character matrix gt cols: 41
##   skip: 0
##   nrows: 2668
##   row_num: 0
## 
Processed variant 1000
Processed variant 2000
Processed variant: 2668
## All variants processed
vcfR
## ***** Object of Class vcfR *****
## 32 samples
## 2668 CHROMs
## 2,668 variants
## Object size: 4.7 Mb
## 2.872 percent missing data
## *****        *****         *****
#filter by MAC thresholds
vcf.3<-min_mac(vcfR, min.mac = 3)
## 39.24% of SNPs fell below a minor allele count of 3 and were removed from the VCF

vcf.3
## ***** Object of Class vcfR *****
## 32 samples
## 1621 CHROMs
## 1,621 variants
## Object size: 3.3 Mb
## 3.499 percent missing data
## *****        *****         *****
#filter by MAC thresholds
vcf.4<-min_mac(vcfR, min.mac = 4)
## 60.31% of SNPs fell below a minor allele count of 4 and were removed from the VCF

vcf.4
## ***** Object of Class vcfR *****
## 32 samples
## 1059 CHROMs
## 1,059 variants
## Object size: 2.5 Mb
## 4.04 percent missing data
## *****        *****         *****
#filter by MAC thresholds
vcf.5<-min_mac(vcfR, min.mac = 5)
## 71.7% of SNPs fell below a minor allele count of 5 and were removed from the VCF

vcf.5
## ***** Object of Class vcfR *****
## 32 samples
## 755 CHROMs
## 755 variants
## Object size: 2 Mb
## 4.383 percent missing data
## *****        *****         *****
#filter by MAC thresholds
vcf.6<-min_mac(vcfR, min.mac = 6)
## 78% of SNPs fell below a minor allele count of 6 and were removed from the VCF

vcf.6
## ***** Object of Class vcfR *****
## 32 samples
## 587 CHROMs
## 587 variants
## Object size: 1.7 Mb
## 4.626 percent missing data
## *****        *****         *****
#filter by MAC thresholds
vcf.7<-min_mac(vcfR, min.mac = 7)
## 81.93% of SNPs fell below a minor allele count of 7 and were removed from the VCF

vcf.7
## ***** Object of Class vcfR *****
## 32 samples
## 482 CHROMs
## 482 variants
## Object size: 1.6 Mb
## 4.72 percent missing data
## *****        *****         *****
#filter by MAC thresholds
vcf.8<-min_mac(vcfR, min.mac = 8)
## 85.12% of SNPs fell below a minor allele count of 8 and were removed from the VCF

vcf.8
## ***** Object of Class vcfR *****
## 32 samples
## 397 CHROMs
## 397 variants
## Object size: 1.4 Mb
## 5.085 percent missing data
## *****        *****         *****
#filter by MAC thresholds
vcf.9<-min_mac(vcfR, min.mac = 9)
## 87.33% of SNPs fell below a minor allele count of 9 and were removed from the VCF

vcf.9
## ***** Object of Class vcfR *****
## 32 samples
## 338 CHROMs
## 338 variants
## Object size: 1.3 Mb
## 5.261 percent missing data
## *****        *****         *****
#filter by MAC thresholds
vcf.10<-min_mac(vcfR, min.mac = 10)
## 89.09% of SNPs fell below a minor allele count of 10 and were removed from the VCF

vcf.10
## ***** Object of Class vcfR *****
## 32 samples
## 291 CHROMs
## 291 variants
## Object size: 1.2 Mb
## 5.284 percent missing data
## *****        *****         *****
#write out vcf file
#vcfR::write.vcf(vcf.3, file="~/Downloads/towhee.mac3.vcf.gz")
#vcfR::write.vcf(vcf.4, file="~/Downloads/towhee.mac4.vcf.gz")
#vcfR::write.vcf(vcf.5, file="~/Downloads/towhee.mac5.vcf.gz")
#vcfR::write.vcf(vcf.6, file="~/Downloads/towhee.mac6.vcf.gz")
#vcfR::write.vcf(vcf.7, file="~/Downloads/towhee.mac7.vcf.gz")
#vcfR::write.vcf(vcf.8, file="~/Downloads/towhee.mac8.vcf.gz")
#vcfR::write.vcf(vcf.9, file="~/Downloads/towhee.mac9.vcf.gz")
#vcfR::write.vcf(vcf.10, file="~/Downloads/towhee.mac10.vcf.gz")

Visualize ADMIXTURE results for MAC 2

#setwd to admixture directory run on the cluster
setwd("~/Desktop/mex.towhees/admixture.mac2/")
#read in log error values to determine optimal K
log<-read.table("log.errors.txt")[,c(3:4)]
#use double backslash to interpret the opening parentheses literally in the regular expression
log$V3<-gsub("\\(K=", "", log$V3)
log$V3<-gsub("):", "", log$V3)
#interpret K values as numerical
log$V3<-as.numeric(log$V3)
#rename columns
colnames(log)<-c("Kvalue","cross.validation.error")
#make plot showing the cross validation error across K values 1:10
ggplot(data=log, aes(x=Kvalue, y=cross.validation.error, group=1)) +
  geom_line(linetype = "dashed")+
  geom_point()+
  ylab("cross-validation error")+
  xlab("K")+
  scale_x_continuous(breaks = c(1:5))+
  theme_classic()

#setwd to admixture directory run on the cluster
setwd("~/Desktop/mex.towhees/admixture.mac2/")
#read in input file in order to get list of input samples in order
samps<-read.table("binary_fileset.fam")[,1]

#reorder sampling df to match order of the plot
sample.info<-sample.info[match(samps, sample.info$ID),]
sample.info$ID == samps
##  [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE
#read in all ten runs and save each dataframe in a list
runs<-list()
#read in log files
for (i in 1:5){
  runs[[i]]<-read.table(paste0("binary_fileset.", i, ".Q"))
}
par(mfrow=c(5,1))
#plot each run
for (i in 1:5){
barplot(t(as.matrix(runs[[i]])), col=rainbow(i), ylab="Ancestry", border="black")
}

#set rownames
rownames(sample.info)<-c(1:32)
#get brewer cols
RColorBrewer::brewer.pal(n=8,"RdGy")
## [1] "#B2182B" "#D6604D" "#F4A582" "#FDDBC7" "#E0E0E0" "#BABABA" "#878787"
## [8] "#4D4D4D"
#reorder samples based on sampling locality for k=2/3
runs[[2]]<-runs[[2]][c(7:10,32,29,30,31,3:6,23,24,21,22,27,28,25,26,11:14,15:18,2,1,19,20),]
runs[[3]]<-runs[[3]][c(7:10,32,29,30,31,3:6,23,24,21,22,27,28,25,26,11:14,15:18,2,1,19,20),]
#plot barplots for the two most relevant runs
barplot(t(as.matrix(runs[[2]])), col=c("#B2182B","#4D4D4D"), ylab="Ancestry", border="black")
barplot(t(as.matrix(runs[[3]])), col=c("#B2182B","#4D4D4D","black"), ylab="Ancestry", border="black")

Visualize ADMIXTURE results for MAC 3

#setwd to admixture directory run on the cluster
setwd("~/Desktop/mex.towhees/admixture.mac3/")
#read in log error values to determine optimal K
log<-read.table("log.errors.txt")[,c(3:4)]
#use double backslash to interpret the opening parentheses literally in the regular expression
log$V3<-gsub("\\(K=", "", log$V3)
log$V3<-gsub("):", "", log$V3)
#interpret K values as numerical
log$V3<-as.numeric(log$V3)
#rename columns
colnames(log)<-c("Kvalue","cross.validation.error")
#make plot showing the cross validation error across K values 1:10
ggplot(data=log, aes(x=Kvalue, y=cross.validation.error, group=1)) +
  geom_line(linetype = "dashed")+
  geom_point()+
  ylab("cross-validation error")+
  xlab("K")+
  scale_x_continuous(breaks = c(1:10))+
  theme_classic()

#setwd to admixture directory run on the cluster
setwd("~/Desktop/mex.towhees/admixture.mac3/")
#read in all ten runs and save each dataframe in a list
runs<-list()
#read in log files
for (i in 1:5){
  runs[[i]]<-read.table(paste0("binary_fileset.", i, ".Q"))
}
par(mfrow=c(5,1))
#plot each run
for (i in 1:5){
barplot(t(as.matrix(runs[[i]])), col=rainbow(i), ylab="Ancestry", border="black")
}

#set rownames
rownames(sample.info)<-c(1:32)
#get brewer cols
RColorBrewer::brewer.pal(n=8,"RdGy")
## [1] "#B2182B" "#D6604D" "#F4A582" "#FDDBC7" "#E0E0E0" "#BABABA" "#878787"
## [8] "#4D4D4D"
#reorder samples based on sampling locality for k=2/3
runs[[2]]<-runs[[2]][c(7:10,32,29,30,31,3:6,23,24,21,22,27,28,25,26,11:14,15:18,2,1,19,20),]
runs[[3]]<-runs[[3]][c(7:10,32,29,30,31,3:6,23,24,21,22,27,28,25,26,11:14,15:18,2,1,19,20),]
#plot barplots for the two most relevant runs
barplot(t(as.matrix(runs[[2]])), col=c("#B2182B","#4D4D4D"), ylab="Ancestry", border="black")
barplot(t(as.matrix(runs[[3]])), col=c("#B2182B","#4D4D4D","black"), ylab="Ancestry", border="black")

Visualize ADMIXTURE results for MAC 4

#setwd to admixture directory run on the cluster
setwd("~/Desktop/mex.towhees/admixture.mac4/")
#read in log error values to determine optimal K
log<-read.table("log.errors.txt")[,c(3:4)]
#use double backslash to interpret the opening parentheses literally in the regular expression
log$V3<-gsub("\\(K=", "", log$V3)
log$V3<-gsub("):", "", log$V3)
#interpret K values as numerical
log$V3<-as.numeric(log$V3)
#rename columns
colnames(log)<-c("Kvalue","cross.validation.error")
#make plot showing the cross validation error across K values 1:10
ggplot(data=log, aes(x=Kvalue, y=cross.validation.error, group=1)) +
  geom_line(linetype = "dashed")+
  geom_point()+
  ylab("cross-validation error")+
  xlab("K")+
  scale_x_continuous(breaks = c(1:10))+
  theme_classic()

#setwd to admixture directory run on the cluster
setwd("~/Desktop/mex.towhees/admixture.mac4/")
#read in all ten runs and save each dataframe in a list
runs<-list()
#read in log files
for (i in 1:5){
  runs[[i]]<-read.table(paste0("binary_fileset.", i, ".Q"))
}
par(mfrow=c(5,1))
#plot each run
for (i in 1:5){
barplot(t(as.matrix(runs[[i]])), col=rainbow(i), ylab="Ancestry", border="black")
}

#set rownames
rownames(sample.info)<-c(1:32)
#get brewer cols
RColorBrewer::brewer.pal(n=8,"RdGy")
## [1] "#B2182B" "#D6604D" "#F4A582" "#FDDBC7" "#E0E0E0" "#BABABA" "#878787"
## [8] "#4D4D4D"
#reorder samples based on sampling locality for k=2/3
runs[[2]]<-runs[[2]][c(7:10,32,29,30,31,3:6,23,24,21,22,27,28,25,26,11:14,15:18,2,1,19,20),]
runs[[3]]<-runs[[3]][c(7:10,32,29,30,31,3:6,23,24,21,22,27,28,25,26,11:14,15:18,2,1,19,20),]
#plot barplots for the two most relevant runs
barplot(t(as.matrix(runs[[2]])), col=c("#B2182B","#4D4D4D"), ylab="Ancestry", border="black")
barplot(t(as.matrix(runs[[3]])), col=c("#B2182B","#4D4D4D","black"), ylab="Ancestry", border="black")

Visualize ADMIXTURE results for MAC 5

#setwd to admixture directory run on the cluster
setwd("~/Desktop/mex.towhees/admixture.mac5/")
#read in log error values to determine optimal K
log<-read.table("log.errors.txt")[,c(3:4)]
#use double backslash to interpret the opening parentheses literally in the regular expression
log$V3<-gsub("\\(K=", "", log$V3)
log$V3<-gsub("):", "", log$V3)
#interpret K values as numerical
log$V3<-as.numeric(log$V3)
#rename columns
colnames(log)<-c("Kvalue","cross.validation.error")
#make plot showing the cross validation error across K values 1:10
ggplot(data=log, aes(x=Kvalue, y=cross.validation.error, group=1)) +
  geom_line(linetype = "dashed")+
  geom_point()+
  ylab("cross-validation error")+
  xlab("K")+
  scale_x_continuous(breaks = c(1:10))+
  theme_classic()

#setwd to admixture directory run on the cluster
setwd("~/Desktop/mex.towhees/admixture.mac5/")
#read in all ten runs and save each dataframe in a list
runs<-list()
#read in log files
for (i in 1:5){
  runs[[i]]<-read.table(paste0("binary_fileset.", i, ".Q"))
}
par(mfrow=c(5,1))
#plot each run
for (i in 1:5){
barplot(t(as.matrix(runs[[i]])), col=rainbow(i), ylab="Ancestry", border="black")
}

#set rownames
rownames(sample.info)<-c(1:32)
#get brewer cols
RColorBrewer::brewer.pal(n=8,"RdGy")
## [1] "#B2182B" "#D6604D" "#F4A582" "#FDDBC7" "#E0E0E0" "#BABABA" "#878787"
## [8] "#4D4D4D"
#reorder samples based on sampling locality for k=2/3
runs[[2]]<-runs[[2]][c(7:10,32,29,30,31,3:6,23,24,21,22,27,28,25,26,11:14,15:18,2,1,19,20),]
runs[[3]]<-runs[[3]][c(7:10,32,29,30,31,3:6,23,24,21,22,27,28,25,26,11:14,15:18,2,1,19,20),]
#plot barplots for the two most relevant runs
barplot(t(as.matrix(runs[[2]])), col=c("#B2182B","#4D4D4D"), ylab="Ancestry", border="black")
barplot(t(as.matrix(runs[[3]])), col=c("#B2182B","#4D4D4D","black"), ylab="Ancestry", border="black")

Visualize ADMIXTURE results for MAC 6

#setwd to admixture directory run on the cluster
setwd("~/Desktop/mex.towhees/admixture.mac6/")
#read in log error values to determine optimal K
log<-read.table("log.errors.txt")[,c(3:4)]
#use double backslash to interpret the opening parentheses literally in the regular expression
log$V3<-gsub("\\(K=", "", log$V3)
log$V3<-gsub("):", "", log$V3)
#interpret K values as numerical
log$V3<-as.numeric(log$V3)
#rename columns
colnames(log)<-c("Kvalue","cross.validation.error")
#make plot showing the cross validation error across K values 1:10
ggplot(data=log, aes(x=Kvalue, y=cross.validation.error, group=1)) +
  geom_line(linetype = "dashed")+
  geom_point()+
  ylab("cross-validation error")+
  xlab("K")+
  scale_x_continuous(breaks = c(1:10))+
  theme_classic()

#setwd to admixture directory run on the cluster
setwd("~/Desktop/mex.towhees/admixture.mac6/")
#read in all ten runs and save each dataframe in a list
runs<-list()
#read in log files
for (i in 1:5){
  runs[[i]]<-read.table(paste0("binary_fileset.", i, ".Q"))
}
par(mfrow=c(5,1))
#plot each run
for (i in 1:5){
barplot(t(as.matrix(runs[[i]])), col=rainbow(i), ylab="Ancestry", border="black")
}

#set rownames
rownames(sample.info)<-c(1:32)
#get brewer cols
RColorBrewer::brewer.pal(n=8,"RdGy")
## [1] "#B2182B" "#D6604D" "#F4A582" "#FDDBC7" "#E0E0E0" "#BABABA" "#878787"
## [8] "#4D4D4D"
#reorder samples based on sampling locality for k=2/3
runs[[2]]<-runs[[2]][c(7:10,32,29,30,31,3:6,23,24,21,22,27,28,25,26,11:14,15:18,2,1,19,20),]
runs[[3]]<-runs[[3]][c(7:10,32,29,30,31,3:6,23,24,21,22,27,28,25,26,11:14,15:18,2,1,19,20),]
#plot barplots for the two most relevant runs
barplot(t(as.matrix(runs[[2]])), col=c("#B2182B","#4D4D4D"), ylab="Ancestry", border="black")
barplot(t(as.matrix(runs[[3]])), col=c("#B2182B","#4D4D4D","black"), ylab="Ancestry", border="black")

Visualize ADMIXTURE results for MAC 7

#setwd to admixture directory run on the cluster
setwd("~/Desktop/mex.towhees/admixture.mac7/")
#read in log error values to determine optimal K
log<-read.table("log.errors.txt")[,c(3:4)]
#use double backslash to interpret the opening parentheses literally in the regular expression
log$V3<-gsub("\\(K=", "", log$V3)
log$V3<-gsub("):", "", log$V3)
#interpret K values as numerical
log$V3<-as.numeric(log$V3)
#rename columns
colnames(log)<-c("Kvalue","cross.validation.error")
#make plot showing the cross validation error across K values 1:10
ggplot(data=log, aes(x=Kvalue, y=cross.validation.error, group=1)) +
  geom_line(linetype = "dashed")+
  geom_point()+
  ylab("cross-validation error")+
  xlab("K")+
  scale_x_continuous(breaks = c(1:10))+
  theme_classic()

#setwd to admixture directory run on the cluster
setwd("~/Desktop/mex.towhees/admixture.mac7/")
#read in all ten runs and save each dataframe in a list
runs<-list()
#read in log files
for (i in 1:5){
  runs[[i]]<-read.table(paste0("binary_fileset.", i, ".Q"))
}
par(mfrow=c(5,1))
#plot each run
for (i in 1:5){
barplot(t(as.matrix(runs[[i]])), col=rainbow(i), ylab="Ancestry", border="black")
}

#set rownames
rownames(sample.info)<-c(1:32)
#get brewer cols
RColorBrewer::brewer.pal(n=8,"RdGy")
## [1] "#B2182B" "#D6604D" "#F4A582" "#FDDBC7" "#E0E0E0" "#BABABA" "#878787"
## [8] "#4D4D4D"
#reorder samples based on sampling locality for k=2/3
runs[[2]]<-runs[[2]][c(7:10,32,29,30,31,3:6,23,24,21,22,27,28,25,26,11:14,15:18,2,1,19,20),]
runs[[3]]<-runs[[3]][c(7:10,32,29,30,31,3:6,23,24,21,22,27,28,25,26,11:14,15:18,2,1,19,20),]
#plot barplots for the two most relevant runs
barplot(t(as.matrix(runs[[2]])), col=c("#B2182B","#4D4D4D"), ylab="Ancestry", border="black")
barplot(t(as.matrix(runs[[3]])), col=c("#B2182B","#4D4D4D","black"), ylab="Ancestry", border="black")

Visualize ADMIXTURE results for MAC 8

#setwd to admixture directory run on the cluster
setwd("~/Desktop/mex.towhees/admixture.mac8/")
#read in log error values to determine optimal K
log<-read.table("log.errors.txt")[,c(3:4)]
#use double backslash to interpret the opening parentheses literally in the regular expression
log$V3<-gsub("\\(K=", "", log$V3)
log$V3<-gsub("):", "", log$V3)
#interpret K values as numerical
log$V3<-as.numeric(log$V3)
#rename columns
colnames(log)<-c("Kvalue","cross.validation.error")
#make plot showing the cross validation error across K values 1:10
ggplot(data=log, aes(x=Kvalue, y=cross.validation.error, group=1)) +
  geom_line(linetype = "dashed")+
  geom_point()+
  ylab("cross-validation error")+
  xlab("K")+
  scale_x_continuous(breaks = c(1:10))+
  theme_classic()

#setwd to admixture directory run on the cluster
setwd("~/Desktop/mex.towhees/admixture.mac8/")
#read in all ten runs and save each dataframe in a list
runs<-list()
#read in log files
for (i in 1:5){
  runs[[i]]<-read.table(paste0("binary_fileset.", i, ".Q"))
}
par(mfrow=c(5,1))
#plot each run
for (i in 1:5){
barplot(t(as.matrix(runs[[i]])), col=rainbow(i), ylab="Ancestry", border="black")
}

#set rownames
rownames(sample.info)<-c(1:32)
#get brewer cols
RColorBrewer::brewer.pal(n=8,"RdGy")
## [1] "#B2182B" "#D6604D" "#F4A582" "#FDDBC7" "#E0E0E0" "#BABABA" "#878787"
## [8] "#4D4D4D"
#reorder samples based on sampling locality for k=2/3
runs[[2]]<-runs[[2]][c(7:10,32,29,30,31,3:6,23,24,21,22,27,28,25,26,11:14,15:18,2,1,19,20),]
runs[[3]]<-runs[[3]][c(7:10,32,29,30,31,3:6,23,24,21,22,27,28,25,26,11:14,15:18,2,1,19,20),]
#plot barplots for the two most relevant runs
barplot(t(as.matrix(runs[[2]])), col=c("#B2182B","#4D4D4D"), ylab="Ancestry", border="black")
barplot(t(as.matrix(runs[[3]])), col=c("#B2182B","#4D4D4D","black"), ylab="Ancestry", border="black")

Visualize ADMIXTURE results for MAC 9

#setwd to admixture directory run on the cluster
setwd("~/Desktop/mex.towhees/admixture.mac9/")
#read in log error values to determine optimal K
log<-read.table("log.errors.txt")[,c(3:4)]
#use double backslash to interpret the opening parentheses literally in the regular expression
log$V3<-gsub("\\(K=", "", log$V3)
log$V3<-gsub("):", "", log$V3)
#interpret K values as numerical
log$V3<-as.numeric(log$V3)
#rename columns
colnames(log)<-c("Kvalue","cross.validation.error")
#make plot showing the cross validation error across K values 1:10
ggplot(data=log, aes(x=Kvalue, y=cross.validation.error, group=1)) +
  geom_line(linetype = "dashed")+
  geom_point()+
  ylab("cross-validation error")+
  xlab("K")+
  scale_x_continuous(breaks = c(1:10))+
  theme_classic()

#setwd to admixture directory run on the cluster
setwd("~/Desktop/mex.towhees/admixture.mac9/")
#read in all ten runs and save each dataframe in a list
runs<-list()
#read in log files
for (i in 1:5){
  runs[[i]]<-read.table(paste0("binary_fileset.", i, ".Q"))
}
par(mfrow=c(5,1))
#plot each run
for (i in 1:5){
barplot(t(as.matrix(runs[[i]])), col=rainbow(i), ylab="Ancestry", border="black")
}

#set rownames
rownames(sample.info)<-c(1:32)
#get brewer cols
RColorBrewer::brewer.pal(n=8,"RdGy")
## [1] "#B2182B" "#D6604D" "#F4A582" "#FDDBC7" "#E0E0E0" "#BABABA" "#878787"
## [8] "#4D4D4D"
#reorder samples based on sampling locality for k=2/3
runs[[2]]<-runs[[2]][c(7:10,32,29,30,31,3:6,23,24,21,22,27,28,25,26,11:14,15:18,2,1,19,20),]
runs[[3]]<-runs[[3]][c(7:10,32,29,30,31,3:6,23,24,21,22,27,28,25,26,11:14,15:18,2,1,19,20),]
#plot barplots for the two most relevant runs
barplot(t(as.matrix(runs[[2]])), col=c("#B2182B","#4D4D4D"), ylab="Ancestry", border="black")
barplot(t(as.matrix(runs[[3]])), col=c("#B2182B","#4D4D4D","black"), ylab="Ancestry", border="black")

Visualize ADMIXTURE results for MAC 10

#setwd to admixture directory run on the cluster
setwd("~/Desktop/mex.towhees/admixture.mac10/")
#read in log error values to determine optimal K
log<-read.table("log.errors.txt")[,c(3:4)]
#use double backslash to interpret the opening parentheses literally in the regular expression
log$V3<-gsub("\\(K=", "", log$V3)
log$V3<-gsub("):", "", log$V3)
#interpret K values as numerical
log$V3<-as.numeric(log$V3)
#rename columns
colnames(log)<-c("Kvalue","cross.validation.error")
#make plot showing the cross validation error across K values 1:10
ggplot(data=log, aes(x=Kvalue, y=cross.validation.error, group=1)) +
  geom_line(linetype = "dashed")+
  geom_point()+
  ylab("cross-validation error")+
  xlab("K")+
  scale_x_continuous(breaks = c(1:10))+
  theme_classic()

#setwd to admixture directory run on the cluster
setwd("~/Desktop/mex.towhees/admixture.mac10/")
#read in all ten runs and save each dataframe in a list
runs<-list()
#read in log files
for (i in 1:5){
  runs[[i]]<-read.table(paste0("binary_fileset.", i, ".Q"))
}
par(mfrow=c(5,1))
#plot each run
for (i in 1:5){
barplot(t(as.matrix(runs[[i]])), col=rainbow(i), ylab="Ancestry", border="black")
}

#set rownames
rownames(sample.info)<-c(1:32)
#get brewer cols
RColorBrewer::brewer.pal(n=8,"RdGy")
## [1] "#B2182B" "#D6604D" "#F4A582" "#FDDBC7" "#E0E0E0" "#BABABA" "#878787"
## [8] "#4D4D4D"
#reorder samples based on sampling locality for k=2/3
runs[[2]]<-runs[[2]][c(7:10,32,29,30,31,3:6,23,24,21,22,27,28,25,26,11:14,15:18,2,1,19,20),]
runs[[3]]<-runs[[3]][c(7:10,32,29,30,31,3:6,23,24,21,22,27,28,25,26,11:14,15:18,2,1,19,20),]
#plot barplots for the two most relevant runs
barplot(t(as.matrix(runs[[2]])), col=c("#B2182B","#4D4D4D"), ylab="Ancestry", border="black")
barplot(t(as.matrix(runs[[3]])), col=c("#B2182B","#4D4D4D","black"), ylab="Ancestry", border="black")

Reorganize barplots to match geography of the transect

#setwd to admixture directory run on the cluster
setwd("~/Desktop/mex.towhees/admixture.mac10/")

#save barplots
#pdf("~/Desktop/mex.towhees/admixture/admix.plots.pdf", width = 8, height=3.3)
#par(mfrow=c(2,1))
#par(mar = c(3, 3, 0, 0), oma = c(1, 1, 1, 1)) #set margins
#barplot(t(as.matrix(runs[[2]])), col=c("#B2182B","#4D4D4D"), ylab="Ancestry", border="black")
#barplot(t(as.matrix(runs[[3]])), col=c("#B2182B","#4D4D4D","black"), ylab="Ancestry", border="black")
#dev.off()